from paddleocr import PaddleOCR
from pdf2image import convert_from_path
import numpy as np

import logging

logging.disable(logging.DEBUG)  # 关闭DEBUG日志的打印
logging.disable(logging.WARNING)  # 关闭WARNING日志的打印

ocr = PaddleOCR(use_angle_cls=True, lang='ch')  # lang可选 'en', 'ch', 'fr' 等

# 识别图片
images = convert_from_path(pdf_path=r'D:\Desktop\图片\高山杨山河施工合同.pdf',poppler_path=r'D:\rjbao\poppler\poppler-24.08.0\Library\bin',fmt="ppm",dpi=600)
# image = Image.open(r'D:\Desktop\图片\高山杨山河施工合同.pdf')
for image in images:
    image = np.array(image)
    result = ocr.ocr(image, cls=True)
    for lines in result:
        for line in lines:
            print(line[1][0])  # 提取文本内容

